clear all
set more off

* set up the working directory in your computer


/*
* KGSS data merge
* Data source : ICPSR

use 34655-0001-Data.dta, clear
gen year = 2003
save kgss2003,replace

use 34660-0001-Data.dta, clear
gen year = 2004
save kgss2004, replace

use 34661-0001-Data.dta, clear
gen year = 2005
save kgss2005, replace

use 34662-0001-Data.dta, clear
gen year = 2006
save kgss2006, replace

use 34663-0001-Data.dta, clear
gen year = 2007
save kgss2007, replace

use 34664-0001-Data.dta, clear
gen year = 2008
save kgss2008, replace

use 34665-0001-Data.dta, clear
gen year = 2009
save kgss2009, replace

use 34666-0001-Data.dta, clear
gen year = 2010
save kgss2010, replace

use kgss2003, clear
forvalues i = 2004/2010 {
append using kgss`i'
}

foreach var of varlist _all{
	rename `var' `=lower("`var'")'
}

compress
save kgss_all, replace
*/

* =============================================================
* KGSS Data Setup
* =============================================================

use kgss_all,clear

* Respondent ID variable
gen id = respid03
replace id = respid if ~missing(respid)

* =============================================================
* Identifying all children in the household roster
* relate contains the information; * 3 = children
* =============================================================

forvalues i = 2/10{
gen x_child`i' = (relate`i' == 3) if relate`i' < 50
}

forvalues i = 2/10{
gen x_daughter`i' = (gender`i' == 2) if x_child`i' == 1 & (gender`i' < 3)
}

forvalues i = 2/10{
gen x_age`i' = old`i' if x_child`i' == 1 & ~missing(x_daughter`i') & old`i' < 100
}


egen x_oldage = rowmax(x_age*)

replace age = . if age > 100
gen x_gengap = age-x_oldage

forvalues i = 2/10{
gen sameage`i' = (x_oldage == x_age`i') if ~missing(x_oldage) & ~missing(x_age`i')
}

forvalues i = 2/10{
gen x_missing`i' = (gender`i' > 2 | old`i' > 99) if ~missing(x_child`i')
}

egen n_missing = rowtotal(x_missing*)

egen n_sameage = rowtotal(sameage*)
tab n_sameage // 17 cases are duplicates


* =====================================================================
* Major Independent Variables
* =====================================================================

gen fx_daughter = .
forvalues i = 2/10{
replace fx_daughter = x_daughter`i' if sameage`i' == 1 & ~missing(x_daughter`i') & ~missing(sameage`i')
}

egen nx_daughter = anycount(x_daughter*), values(1)
egen nx_son = anycount(x_daughter*), values(0)
egen nx_child = rowtotal(x_child*)
gen na_child = nx_daughter+nx_son

gen px_daughter = nx_daughter / (nx_daughter+nx_son)

label var fx_daughter "Oldest=Daughter"

* Sample selection
gen out_sample = 0
replace out_sample = 1 if out_sample == 0 & nx_child == 0
replace out_sample = 2 if out_sample == 0 & x_gengap < 10
replace out_sample = 3 if out_sample == 0 & n_missing > 0
replace out_sample = 4 if out_sample == 0 & n_sameage > 1
replace out_sample = 5 if out_sample == 0 & x_oldage > 17
* NOTE : Korean has a different criteria to count the age: adding one year 


* Dependent Variables
* =====================================================================
* Political Ieology
gen d_conscale = partylr - 3 if partylr < 6
recode d_conscale (min/0=0) (1/max=1), gen(d_con)
replace d_conscale = d_conscale / 2 * 100

* Party ID 
* NOTE : Korean 
gen d_hanara = .
replace d_hanara = (partyid3 == 2) if year == 2003
replace d_hanara = (partyid == 2) if year > 2003 & year < 2010
replace d_hanara = (partyid == 1) if year == 2010


local X d_hanara
replace `X' = . if (partyid3 == 77 | partyid3 == 88 | partyid3 == 99) & year == 2003
replace `X' = . if (partyid == 77 | partyid == 88 | partyid == 99) & year == 2004
replace `X' = . if (partyid == 88 | partyid == 99) & year == 2005
replace `X' = . if (partyid == 88 | partyid == 99) & year == 2006
replace `X' = . if (partyid == 88 | partyid == 99) & year == 2007
replace `X' = . if (partyid == 88 | partyid == 99) & year == 2008
replace `X' = . if (partyid == 88 | partyid == 99) & year == 2009
replace `X' = . if (partyid == 88 | partyid == 99) & year == 2010


* =====================================================================
* Control Variables
* age / sex / oldest age / educ
* NOTE : almost all Korean are native born; no such indicator is found in the data set
* =====================================================================
* control years of education
recode educ (0=0) (1=6) (2=9) (3=12) (4=14) (5=16) (6=18) (7=6) (8 88=.), gen(c_educ)

egen n_nonmiss = rowmiss(age sex c_educ x_oldage)

keep id age sex c_educ x_oldage fx_daughter year out_sample n_nonmiss d_hanara d_conscale
save kgss_analytic, replace


* =====================================================================
* Analysis Begins here
* =====================================================================


 
use kgss_analytic, clear

* =====================================================================
* Table S11 : OLS results for political ideology and party identification
* =====================================================================

local cv age sex c_educ x_oldage
estimates clear
reg d_conscale fx_daughter i.year if out_sample == 0 & n_nonmiss == 0
estimates store m1
reg d_conscale fx_daughter `cv' i.year if out_sample == 0 & n_nonmiss==0
estimates store m2
reg d_hanara fx_daughter i.year if out_sample == 0 & n_nonmiss == 0
estimates store m3
reg d_hanara fx_daughter `cv' i.year if out_sample == 0 & n_nonmiss==0
estimates store m4
esttab * using TS11_KGSS.csv, csv replace star(+ 0.1 * 0.05) nogap ar2 se b(%9.3f) drop(*year)



sort year
egen ry = group(year) if out_sample == 0 & n_nonmiss == 0

* =====================================================================
* Figure S3 : period variations : later plotted using R-codes
* =====================================================================

** LOOP
foreach Y of varlist d_conscale d_hanara {
gen o_year = .
gen o_coef = .
gen o_se = .
gen o_p = .
gen o_star =""

foreach X of varlist fx_daughter {
tab year if out_sample == 0 & n_nonmiss == 0
local ymax = `r(r)'

forvalues i = 1/`ymax'{
sum year if ry == `i'
replace o_year = `r(mean)' in `i'
reg  `Y' `X' if out_sample == 0 & ry == `i' & n_nonmiss == 0
replace o_coef = _b[`X'] in `i'
replace o_se = _se[`X'] in `i'
replace o_p = 2 * ttail(e(df_r), abs(_b[`X']/_se[`X'])) in `i'
}

}
replace o_star = "+" if o_p < 0.1 
replace o_star = "*" if o_p < 0.05 

outsheet o_* using year_LPM_KGSS_`Y'.csv, comma replace, if o_coef != .
drop o_*
}


